The perform action of this still needs work to do the right thing.
In particular, it currently behaves as if --others was always set.
And, it duplicates a lot of code from addcomputed.
import qualified Command.MaxSize
import qualified Command.Sim
import qualified Command.AddComputed
+import qualified Command.Recompute
import qualified Command.Version
import qualified Command.RemoteDaemon
#ifdef WITH_ASSISTANT
, Command.MaxSize.cmd
, Command.Sim.cmd
, Command.AddComputed.cmd
+ , Command.Recompute.cmd
, Command.Version.cmd
, Command.RemoteDaemon.cmd
#ifdef WITH_ASSISTANT
import Annex.CatFile
import Annex.Content.Presence
import Annex.Ingest
-import Types.RemoteConfig
import Types.KeySource
import Messages.Progress
import Logs.Location
seek' :: AddComputedOptions -> CommandSeek
seek' o = do
r <- getParsed (computeRemote o)
- unless (Remote.typename (Remote.remotetype r) == Remote.typename Remote.Compute.remote) $
+ unless (Remote.Compute.isComputeRemote r) $
giveup "That is not a compute remote."
- let rc = unparsedRemoteConfig (Remote.config r)
- case Remote.Compute.getComputeProgram rc of
- Left err -> giveup $
- "Problem with the configuration of the compute remote: " ++ err
- Right program -> commandAction $ start o r program
+ commandAction $ start o r
-start :: AddComputedOptions -> Remote -> Remote.Compute.ComputeProgram -> CommandStart
-start o r program = starting "addcomputed" ai si $ perform o r program
+start :: AddComputedOptions -> Remote -> CommandStart
+start o r = starting "addcomputed" ai si $ perform o r
where
ai = ActionItemUUID (Remote.uuid r) (UnquotedString (Remote.name r))
si = SeekInput (computeParams o)
-perform :: AddComputedOptions -> Remote -> Remote.Compute.ComputeProgram -> CommandPerform
-perform o r program = do
+perform :: AddComputedOptions -> Remote -> CommandPerform
+perform o r = do
+ program <- Remote.Compute.getComputeProgram r
repopath <- fromRepo Git.repoPath
subdir <- liftIO $ relPathDirToFile repopath (literalOsPath ".")
let state = Remote.Compute.ComputeState
showOutput
Remote.Compute.runComputeProgram program state
(Remote.Compute.ImmutableState False)
- (getinputcontent fast)
+ (getInputContent fast)
(go starttime fast)
next $ return True
where
- getinputcontent fast p = catKeyFile p >>= \case
- Just inputkey -> do
- obj <- calcRepo (gitAnnexLocation inputkey)
- if fast
- then return (inputkey, Nothing)
- else ifM (inAnnex inputkey)
- ( return (inputkey, Just obj)
- , giveup $ "The computation needs the content of a file which is not present: " ++ fromOsPath p
- )
- Nothing -> ifM (liftIO $ doesFileExist p)
- ( giveup $ "The computation needs an input file that is not an annexed file: " ++ fromOsPath p
- , giveup $ "The computation needs an input file which does not exist: " ++ fromOsPath p
- )
-
go starttime fast state tmpdir = do
endtime <- liftIO currentMonotonicTimestamp
let ts = calcduration starttime endtime
isreproducible state = case reproducible o of
Just v -> isReproducible v
Nothing -> Remote.Compute.computeReproducible state
+
+getInputContent :: Bool -> OsPath -> Annex (Key, Maybe OsPath)
+getInputContent fast p = catKeyFile p >>= \case
+ Just inputkey -> do
+ obj <- calcRepo (gitAnnexLocation inputkey)
+ if fast
+ then return (inputkey, Nothing)
+ else ifM (inAnnex inputkey)
+ ( return (inputkey, Just obj)
+ , giveup $ "The computation needs the content of a file which is not present: " ++ fromOsPath p
+ )
+ Nothing -> ifM (liftIO $ doesFileExist p)
+ ( giveup $ "The computation needs an input file that is not an annexed file: " ++ fromOsPath p
+ , giveup $ "The computation needs an input file which does not exist: " ++ fromOsPath p
+ )
--- /dev/null
+{- git-annex command
+ -
+ - Copyright 2025 Joey Hess <id@joeyh.name>
+ -
+ - Licensed under the GNU AGPL version 3 or higher.
+ -}
+
+{-# LANGUAGE OverloadedStrings #-}
+
+module Command.Recompute where
+
+import Command
+import qualified Git
+import qualified Annex
+import qualified Remote.Compute
+import qualified Remote
+import qualified Types.Remote as Remote
+import Annex.CatFile
+import Annex.Content.Presence
+import Annex.Ingest
+import Git.FilePath
+import Types.RemoteConfig
+import Types.KeySource
+import Messages.Progress
+import Logs.Location
+import Utility.Metered
+import Utility.MonotonicClock
+import Backend.URL (fromUrl)
+import Command.AddComputed (Reproducible(..), parseReproducible, getInputContent)
+
+import qualified Data.Map as M
+import Data.Time.Clock
+
+cmd :: Command
+cmd = notBareRepo $
+ command "recompute" SectionCommon "recompute computed files"
+ paramPaths (seek <$$> optParser)
+
+data RecomputeOptions = RecomputeOptions
+ { recomputeThese :: CmdParams
+ , originalOption :: Bool
+ , othersOption :: Bool
+ , reproducible :: Maybe Reproducible
+ , computeRemote :: Maybe (DeferredParse Remote)
+ }
+
+optParser :: CmdParamsDesc -> Parser RecomputeOptions
+optParser desc = RecomputeOptions
+ <$> cmdParams desc
+ <*> switch
+ ( long "original"
+ <> help "recompute using original content of input files"
+ )
+ <*> switch
+ ( long "others"
+ <> help "stage other files that are recomputed in passing"
+ )
+ <*> parseReproducible
+ <*> optional (mkParseRemoteOption <$> parseRemoteOption)
+
+seek :: RecomputeOptions -> CommandSeek
+seek o = startConcurrency commandStages (seek' o)
+
+seek' :: RecomputeOptions -> CommandSeek
+seek' o = do
+ computeremote <- maybe (pure Nothing) (Just <$$> getParsed)
+ (computeRemote o)
+ let seeker = AnnexedFileSeeker
+ { startAction = const $ start o computeremote
+ , checkContentPresent = Nothing
+ , usesLocationLog = True
+ }
+ withFilesInGitAnnex ww seeker
+ =<< workTreeItems ww (recomputeThese o)
+ where
+ ww = WarnUnmatchLsFiles "recompute"
+
+start :: RecomputeOptions -> Maybe Remote -> SeekInput -> OsPath -> Key -> CommandStart
+start o (Just computeremote) si file key =
+ stopUnless (notElem (Remote.uuid computeremote) <$> loggedLocations key) $
+ start' o computeremote si file key
+start o Nothing si file key = do
+ rs <- catMaybes <$> (mapM Remote.byUUID =<< loggedLocations key)
+ case sortOn Remote.cost $ filter Remote.Compute.isComputeRemote rs of
+ [] -> stop
+ (r:_) -> start' o r si file key
+
+start' :: RecomputeOptions -> Remote -> SeekInput -> OsPath -> Key -> CommandStart
+start' o r si file key =
+ Remote.Compute.getComputeState
+ (Remote.remoteStateHandle r) key >>= \case
+ Nothing -> stop
+ Just state ->
+ stopUnless (shouldrecompute state) $
+ starting "recompute" ai si $
+ perform o r file key state
+ where
+ ai = mkActionItem (key, file)
+
+ shouldrecompute state
+ | originalOption o = return True
+ | otherwise =
+ anyM (inputchanged state) $
+ M.toList (Remote.Compute.computeInputs state)
+
+ inputchanged state (inputfile, inputkey) = do
+ -- Note that the paths from the remote state are not to be
+ -- trusted to point to a file in the repository, but using
+ -- the path with catKeyFile will only succeed if it
+ -- is checked into the repository.
+ p <- fromRepo $ fromTopFilePath $ asTopFilePath $
+ Remote.Compute.computeSubdir state </> inputfile
+ catKeyFile p >>= return . \case
+ Just k -> k /= inputkey
+ -- When an input file is missing, go ahead and
+ -- recompute. This way, the user will see the
+ -- computation fail, with an error message that
+ -- explains the problem.
+ -- XXX check that this works well
+ Nothing -> True
+
+perform :: RecomputeOptions -> Remote -> OsPath -> Key -> Remote.Compute.ComputeState -> CommandPerform
+perform o r file key oldstate = do
+ program <- Remote.Compute.getComputeProgram r
+ let recomputestate = oldstate
+ { Remote.Compute.computeInputs = mempty
+ , Remote.Compute.computeOutputs = mempty
+ }
+ fast <- Annex.getRead Annex.fast
+ starttime <- liftIO currentMonotonicTimestamp
+ showOutput
+ Remote.Compute.runComputeProgram program recomputestate
+ (Remote.Compute.ImmutableState False)
+ (getinputcontent program fast)
+ (go starttime fast)
+ next $ return True
+ where
+ getinputcontent program fast p
+ | originalOption o =
+ case M.lookup p (Remote.Compute.computeInputs oldstate) of
+ Just inputkey -> return (inputkey, Nothing)
+ Nothing -> Remote.Compute.computationBehaviorChangeError program
+ "requesting a new input file" p
+ | otherwise = getInputContent fast p
+
+ go starttime fast state tmpdir = do
+ endtime <- liftIO currentMonotonicTimestamp
+ let ts = calcduration starttime endtime
+ let outputs = Remote.Compute.computeOutputs state
+ when (M.null outputs) $
+ giveup "The computation succeeded, but it did not generate any files."
+ oks <- forM (M.keys outputs) $ \outputfile -> do
+ showAction $ "adding " <> QuotedPath outputfile
+ k <- catchNonAsync (addfile fast state tmpdir outputfile)
+ (\err -> giveup $ "Failed to ingest output file " ++ fromOsPath outputfile ++ ": " ++ show err)
+ return (outputfile, Just k)
+ let state' = state
+ { Remote.Compute.computeOutputs = M.fromList oks
+ }
+ forM_ (mapMaybe snd oks) $ \k -> do
+ Remote.Compute.setComputeState
+ (Remote.remoteStateHandle r)
+ k ts state'
+ logChange NoLiveUpdate k (Remote.uuid r) InfoPresent
+
+ addfile fast state tmpdir outputfile
+ | fast = do
+ addSymlink outputfile stateurlk Nothing
+ return stateurlk
+ | isreproducible state = do
+ sz <- liftIO $ getFileSize outputfile'
+ metered Nothing sz Nothing $ \_ p ->
+ ingestwith $ ingestAdd p (Just ld)
+ | otherwise = ingestwith $
+ ingestAdd' nullMeterUpdate (Just ld) (Just stateurlk)
+ where
+ stateurl = Remote.Compute.computeStateUrl r state outputfile
+ stateurlk = fromUrl stateurl Nothing True
+ outputfile' = tmpdir </> outputfile
+ ld = LockedDown ldc $ KeySource
+ { keyFilename = outputfile
+ , contentLocation = outputfile'
+ , inodeCache = Nothing
+ }
+ ingestwith a = a >>= \case
+ Nothing -> giveup "key generation failed"
+ Just k -> do
+ logStatus NoLiveUpdate k InfoPresent
+ return k
+
+ ldc = LockDownConfig
+ { lockingFile = True
+ , hardlinkFileTmpDir = Nothing
+ , checkWritePerms = True
+ }
+
+ calcduration (MonotonicTimestamp starttime) (MonotonicTimestamp endtime) =
+ fromIntegral (endtime - starttime) :: NominalDiffTime
+
+ isreproducible state = case reproducible o of
+ Just v -> isReproducible v
+ Nothing -> Remote.Compute.computeReproducible state
module Remote.Compute (
remote,
+ isComputeRemote,
ComputeState(..),
setComputeState,
- getComputeStates,
+ getComputeState,
computeStateUrl,
ComputeProgram,
getComputeProgram,
runComputeProgram,
ImmutableState(..),
+ computationBehaviorChangeError,
defaultComputeParams,
) where
, thirdPartyPopulated = False
}
+isComputeRemote :: Remote -> Bool
+isComputeRemote r = typename (remotetype r) == typename remote
+
gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> RemoteStateHandle -> Annex (Maybe Remote)
-gen r u rc gc rs = case getComputeProgram rc of
+gen r u rc gc rs = case getComputeProgram' rc of
Left _err -> return Nothing
Right program -> do
c <- parsedRemoteConfig remote rc
setupInstance :: SetupStage -> Maybe UUID -> Maybe CredPair -> RemoteConfig -> RemoteGitConfig -> Annex (RemoteConfig, UUID)
setupInstance _ mu _ c _ = do
- ComputeProgram program <- either giveup return (getComputeProgram c)
+ ComputeProgram program <- either giveup return $ getComputeProgram' c
unlessM (liftIO $ inSearchPath program) $
giveup $ "Cannot find " ++ program ++ " in PATH"
u <- maybe (liftIO genUUID) return mu
newtype ComputeProgram = ComputeProgram String
deriving (Show)
-getComputeProgram :: RemoteConfig -> Either String ComputeProgram
-getComputeProgram c = case fromProposedAccepted <$> M.lookup programField c of
+getComputeProgram :: Remote -> Annex ComputeProgram
+getComputeProgram r =
+ case getComputeProgram' (unparsedRemoteConfig (config r)) of
+ Right program -> return program
+ Left err -> giveup $
+ "Problem with the configuration of compute remote " ++ name r ++ ": " ++ err
+
+getComputeProgram' :: RemoteConfig -> Either String ComputeProgram
+getComputeProgram' c = case fromProposedAccepted <$> M.lookup programField c of
Just program
| safetyPrefix `isPrefixOf` program ->
Right (ComputeProgram program)
(mkMetaFieldUnchecked $ T.pack ('t':show (truncateResolution 1 ts)))
(S.singleton (MetaValue (CurrentlySet True) (formatComputeState k st)))
-getComputeStates :: RemoteStateHandle -> Key -> Annex [(NominalDiffTime, ComputeState)]
-getComputeStates rs k = do
+{- When multiple ComputeStates have been recorded for the same key,
+ - this returns one that is probably less expensive to compute,
+ - based on the original time it took to compute it. -}
+getComputeState:: RemoteStateHandle -> Key -> Annex (Maybe ComputeState)
+getComputeState rs k = headMaybe . map snd . sortOn fst
+ <$> getComputeStatesUnsorted rs k
+
+getComputeStatesUnsorted :: RemoteStateHandle -> Key -> Annex [(NominalDiffTime, ComputeState)]
+getComputeStatesUnsorted rs k = do
RemoteMetaData _ (MetaData m) <- getCurrentRemoteMetaData rs k
return $ go [] (M.toList m)
where
let f' = toOsPath f
let knowninput = M.member f' (computeInputs state')
checksafefile tmpdir subdir f' "input"
- checkimmutable knowninput l $ do
+ checkimmutable knowninput "inputting" f' $ do
(k, mp) <- getinputcontent f'
mp' <- liftIO $ maybe (pure Nothing)
(Just <$$> relPathDirToFile subdir)
let f' = toOsPath f
checksafefile tmpdir subdir f' "output"
let knownoutput = M.member f' (computeOutputs state')
- checkimmutable knownoutput l $
+ checkimmutable knownoutput "outputting" f' $
return $ if knownoutput
then state'
else state'
when (any (\p -> dropTrailingPathSeparator p == literalOsPath ".git") (splitPath f)) $
err "inside the .git directory"
- checkimmutable True _ a = a
- checkimmutable False l a
+ checkimmutable True _ _ a = a
+ checkimmutable False requestdesc p a
| not immutablestate = a
- | otherwise = giveup $
- program ++ " is not behaving the same way it used to, now outputting: \"" ++ l ++ "\""
+ | otherwise = computationBehaviorChangeError (ComputeProgram program) requestdesc p
+
+computationBehaviorChangeError :: ComputeProgram -> String -> OsPath -> Annex a
+computationBehaviorChangeError (ComputeProgram program) requestdesc p =
+ giveup $ program ++ " is not behaving the same way it used to, now " ++ requestdesc ++ ": " ++ fromOsPath p
computeKey :: RemoteStateHandle -> ComputeProgram -> Key -> AssociatedFile -> OsPath -> MeterUpdate -> VerifyConfig -> Annex Verification
-computeKey rs (ComputeProgram program) k af dest p vc = do
- states <- map snd . sortOn fst -- least expensive probably
- <$> getComputeStates rs k
- case mapMaybe computeskey states of
- ((keyfile, state):_) -> runComputeProgram
- (ComputeProgram program)
- state
- (ImmutableState True)
- (getinputcontent state)
- (go keyfile)
- [] -> giveup "Missing compute state"
+computeKey rs (ComputeProgram program) k af dest p vc =
+ getComputeState rs k >>= \case
+ Just state ->
+ case computeskey state of
+ Just keyfile -> runComputeProgram
+ (ComputeProgram program)
+ state
+ (ImmutableState True)
+ (getinputcontent state)
+ (go keyfile)
+ Nothing -> missingstate
+ Nothing -> missingstate
where
+ missingstate = giveup "Missing compute state"
+
getinputcontent state f =
case M.lookup (fromOsPath f) (computeInputs state) of
Just inputkey -> do
computeskey state =
case M.keys $ M.filter (== Just k) (computeOutputs state) of
- (keyfile : _) -> Just (keyfile, state)
+ (keyfile : _) -> Just keyfile
[] -> Nothing
go keyfile state tmpdir = do
-- Make sure that the compute state exists.
checkKey :: RemoteStateHandle -> Key -> Annex Bool
checkKey rs k = do
- states <- getComputeStates rs k
+ states <- getComputeStatesUnsorted rs k
if null states
then giveup "Missing compute state"
else return True
reproducible output (except when using `--fast`).
If a computation turns out not to be fully reproducible, then getting
- the file from the compute remote will later fail with a checksum
- verification error. One thing that can be done then is to use
- `git-annex recompute --unreproducible`.
+ a computed file from the compute remote will later fail with a
+ checksum verification error. One thing that can be done then is to use
+ `git-annex recompute --original --unreproducible`.
* Also the [[git-annex-common-options]](1) can be used.
# NAME
-git-annex recompute - update computed files
+git-annex recompute - recompute computed files
# SYNOPSIS
# DESCRIPTION
This updates computed files that were added with
-[[git-annex-addcomputed]](1).
+[[git-annex-addcomputed]](1).
+
+When the output of the computation is different, the updated computed
+file is staged in the repository.
By default, this only recomputes files whose input files have changed.
-The new contents of the input files are used to re-run the computation,
-and when the output is different, the updated computed file is staged
-in the repository.
+The new contents of the input files are used to re-run the computation.
# OPTIONS
-* `--unchanged`
+* `--original`
+
+ Use the original content of input files.
- Recompute files even when their input files have not changed.
+* `--others`
+
+ When recomputing one file also generates new versions of other files,
+ stage those other files in the repository too.
* `--unreproducible`, `-u`
Convert files that were added with `git-annex addcomputed --unreproducible`
to be as if they were added with `--reproducible`.
+* `--remote=name`
+
+ Only recompute files that were computed by this compute remote.
+
+ When this option is not used, all computed files are recomputed using
+ whatever compute remote was originally used to add them. In cases where
+ a file can be computed by multiple remotes, the one with the lowest
+ configured cost will be used.
+
* matching options
The [[git-annex-matching-options]](1) can be used to control what
files to recompute.
- For example, to only recompute files that are computed by the "photoconv"
- compute remote, use `--in=photoconv`
-
* Also the [[git-annex-common-options]](1) can be used.
# SEE ALSO
Command.Proxy
Command.Pull
Command.Push
+ Command.Recompute
Command.ReKey
Command.ReadPresentKey
Command.RecvKey